## Import
install.packages("irr")
install.packages("dplyr")

## Import packages
library(irr)
library(dplyr)



## Import the dataset

data <- read_excel("data.xlsx")

## Explore the data

dimnames(data)
table(data$AREA)
summary(data)

summary(data$Especialidad)

table(data$AREA)
table(data$TIPO_ITEM)

## Score of chatbots

sum(data$BING_1M) ##Mejor
sum(data$BING_2) 
sum(data$BING_3)

sum(data$GPT4_1M) ##Mejor
sum(data$GPT4_2)
sum(data$GPT4_3)

sum(data$GPT3_1)
sum(data$GPT3_2) ##Mejor
sum(data$GPT3_3)



sum(data$CLAUDE_1) ##Mejor 
sum(data$CLAUDE_2)
sum(data$CLAUDE_3)


sum(data$BARD_1)
sum(data$BARD_2) ##Mejor
sum(data$BARD_3)


sum(data$BING_1M) ##Mejor
sum(data$GPT4_1M) ##Mejor
sum(data$GPT3_2) ##Mejor
sum(data$CLAUDE_1) ##Mejor 
sum(data$BARD_2) ##Mejor




# Concoordance

## Concordance between BingAI

data_BINGAI <- data[, c("BING_1M", "BING_2", "BING_3","AREA")]
kappa_bingai <- kappam.fleiss(data_BINGAI)
kappa_bingai


data_BINGAI2 <- data[, c("BING_1M", "BING_2", "BING_3","TIPO_ITEM")]
data_BINGAI2


## CIRU for BingAI
data_BINGAI_CIRU <- subset(data, AREA == "CIRU", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_ciru <- kappam.fleiss(data_BINGAI_CIRU)
kappa_bingai_ciru

## MED_INT for BingAI
data_BINGAI_MED_INT <- subset(data, AREA == "MED_INT", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_med_int <- kappam.fleiss(data_BINGAI_MED_INT)
kappa_bingai_med_int

## PED for BingAI
data_BINGAI_PED <- subset(data, AREA == "PED", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_ped <- kappam.fleiss(data_BINGAI_PED)
kappa_bingai_ped

## OBGYN for BingAI
data_BINGAI_OBGYN <- subset(data, AREA == "OBGYN", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_obgyn <- kappam.fleiss(data_BINGAI_OBGYN)
kappa_bingai_obgyn

## SALUD_PUB for BingAI
data_BINGAI_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_salud_pub <- kappam.fleiss(data_BINGAI_SALUD_PUB)
kappa_bingai_salud_pub

## EMG for BingAI
data_BINGAI_EMG <- subset(data, AREA == "EMG", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_emg <- kappam.fleiss(data_BINGAI_EMG)
kappa_bingai_emg

## ITEM 1 for BINGAI

data_BINGAI_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_I1 <- kappam.fleiss(data_BINGAI_I1)
kappa_bingai_I1

data_BINGAI_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("BING_1M", "BING_2", "BING_3"))
kappa_bingai_I2 <- kappam.fleiss(data_BINGAI_I2)
kappa_bingai_I2



## Concordance between GPT4
data_GPT4 <- data[, c("GPT4_1M", "GPT4_2", "GPT4_3","AREA")]
kappa_gpt4 <- kappam.fleiss(data_GPT4)
kappa_gpt4

data_GPT42 <- data[, c("GPT4_1M", "GPT4_2", "GPT4_3","TIPO_ITEM")]
data_GPT42

## CIRU for GPT4
data_GPT4_CIRU
data_GPT4_CIRU <- subset(data, AREA == "CIRU", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_ciru <- kappam.fleiss(data_GPT4_CIRU)
kappa_gpt4_ciru

## MED_INT for GPT4
data_GPT4_MED_INT <- subset(data, AREA == "MED_INT", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_med_int <- kappam.fleiss(data_GPT4_MED_INT)
kappa_gpt4_med_int

## PED for GPT4
data_GPT4_PED <- subset(data, AREA == "PED", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_ped <- kappam.fleiss(data_GPT4_PED)
kappa_gpt4_ped

## OBGYN for GPT4
data_GPT4_OBGYN <- subset(data, AREA == "OBGYN", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_obgyn <- kappam.fleiss(data_GPT4_OBGYN)
kappa_gpt4_obgyn

## SALUD_PUB for GPT4
data_GPT4_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_salud_pub <- kappam.fleiss(data_GPT4_SALUD_PUB)
kappa_gpt4_salud_pub

## EMG for GPT4
data_GPT4_EMG <- subset(data, AREA == "EMG", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_emg <- kappam.fleiss(data_GPT4_EMG)
kappa_gpt4_emg

## ITEM 1 for GPT4
data_GPT4_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_I1 <- kappam.fleiss(data_GPT4_I1)
kappa_gpt4_I1

data_GPT4_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("GPT4_1M", "GPT4_2", "GPT4_3"))
kappa_gpt4_I2 <- kappam.fleiss(data_GPT4_I2)
kappa_gpt4_I2



## GPT-3
## Concordance between GPT3
data_GPT3 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3","AREA")]
kappa_gpt3 <- kappam.fleiss(data_GPT3)
kappa_gpt3

data_GPT32 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3","TIPO_ITEM")]
data_GPT32

## CIRU for GPT3
data_GPT3_CIRU <- subset(data, AREA == "CIRU", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_ciru <- kappam.fleiss(data_GPT3_CIRU)
kappa_gpt3_ciru

## MED_INT for GPT3
data_GPT3_MED_INT <- subset(data, AREA == "MED_INT", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_med_int <- kappam.fleiss(data_GPT3_MED_INT)
kappa_gpt3_med_int

## PED for GPT3
data_GPT3_PED <- subset(data, AREA == "PED", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_ped <- kappam.fleiss(data_GPT3_PED)
kappa_gpt3_ped

## OBGYN for GPT3
data_GPT3_OBGYN <- subset(data, AREA == "OBGYN", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_obgyn <- kappam.fleiss(data_GPT3_OBGYN)
kappa_gpt3_obgyn

## SALUD_PUB for GPT3
data_GPT3_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_salud_pub <- kappam.fleiss(data_GPT3_SALUD_PUB)
kappa_gpt3_salud_pub

## EMG for GPT3
data_GPT3_EMG <- subset(data, AREA == "EMG", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_emg <- kappam.fleiss(data_GPT3_EMG)
kappa_gpt3_emg

## ITEM 1 for GPT3
data_GPT3_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_I1 <- kappam.fleiss(data_GPT3_I1)
kappa_gpt3_I1

data_GPT3_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("GPT3_1", "GPT3_2", "GPT3_3"))
kappa_gpt3_I2 <- kappam.fleiss(data_GPT3_I2)
kappa_gpt3_I2



## Concordance between Claude
data_Claude <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3","AREA")]
kappa_claude <- kappam.fleiss(data_Claude)
kappa_claude

data_Claude2 <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3","TIPO_ITEM")]
data_Claude2

## CIRU for Claude
data_Claude_CIRU <- subset(data, AREA == "CIRU", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_ciru <- kappam.fleiss(data_Claude_CIRU)
kappa_claude_ciru

## MED_INT for Claude
data_Claude_MED_INT <- subset(data, AREA == "MED_INT", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_med_int <- kappam.fleiss(data_Claude_MED_INT)
kappa_claude_med_int

## PED for Claude
data_Claude_PED <- subset(data, AREA == "PED", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_ped <- kappam.fleiss(data_Claude_PED)
kappa_claude_ped

## OBGYN for Claude
data_Claude_OBGYN <- subset(data, AREA == "OBGYN", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_obgyn <- kappam.fleiss(data_Claude_OBGYN)
kappa_claude_obgyn

## SALUD_PUB for Claude
data_Claude_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_salud_pub <- kappam.fleiss(data_Claude_SALUD_PUB)
kappa_claude_salud_pub

## EMG for Claude
data_Claude_EMG <- subset(data, AREA == "EMG", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_emg <- kappam.fleiss(data_Claude_EMG)
kappa_claude_emg

## ITEM 1 for Claude
data_Claude_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_I1 <- kappam.fleiss(data_Claude_I1)
kappa_claude_I1

data_Claude_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3"))
kappa_claude_I2 <- kappam.fleiss(data_Claude_I2)
kappa_claude_I2


## Concordance between BARD
data_BARD <- data[, c("BARD_1", "BARD_2", "BARD_3","AREA")]
kappa_bard <- kappam.fleiss(data_BARD)
kappa_bard

data_BARD2 <- data[, c("BARD_1", "BARD_2", "BARD_3","TIPO_ITEM")]
data_BARD2

## CIRU for BARD
data_BARD_CIRU <- subset(data, AREA == "CIRU", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_ciru <- kappam.fleiss(data_BARD_CIRU)
kappa_bard_ciru

## MED_INT for BARD
data_BARD_MED_INT <- subset(data, AREA == "MED_INT", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_med_int <- kappam.fleiss(data_BARD_MED_INT)
kappa_bard_med_int

## PED for BARD
data_BARD_PED <- subset(data, AREA == "PED", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_ped <- kappam.fleiss(data_BARD_PED)
kappa_bard_ped

## OBGYN for BARD
data_BARD_OBGYN <- subset(data, AREA == "OBGYN", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_obgyn <- kappam.fleiss(data_BARD_OBGYN)
kappa_bard_obgyn

## SALUD_PUB for BARD
data_BARD_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_salud_pub <- kappam.fleiss(data_BARD_SALUD_PUB)
kappa_bard_salud_pub

## EMG for BARD
data_BARD_EMG <- subset(data, AREA == "EMG", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_emg <- kappam.fleiss(data_BARD_EMG)
kappa_bard_emg

## ITEM 1 for BARD
data_BARD_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_I1 <- kappam.fleiss(data_BARD_I1)
kappa_bard_I1

data_BARD_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("BARD_1", "BARD_2", "BARD_3"))
kappa_bard_I2 <- kappam.fleiss(data_BARD_I2)
kappa_bard_I2






## Concordance between GPT-3

data_gpt_3 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3")]
kappa_gpt3 <- kappam.fleiss(data_gpt_3)
kappa_gpt3


## Concordance between claude

data_claude <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")]
kappa_claude <- kappam.fleiss(data_claude)
kappa_claude


## Concordance between Bard

data_bard <- data[, c("BARD_1", "BARD_2", "BARD_3",)]
kappa_bard <- kappam.fleiss(data_bard)
kappa_bard


## GPT_CERT

table(data$GPT_CERT)
table(data$GPT_UTI)
table(data$GPT_CLASE)


## BING_CERT
table(data$BING_CERT)
table(data$BING_UTI)
table(data$BING_CLASE)




library(bibliometrix)
biblioshiny ()




## Models regresison


sum(data$BING_1M) ##Mejor
sum(data$GPT4_1M) ##Mejor
sum(data$GPT3_2) ##Mejor
sum(data$CLAUDE_1) ##Mejor 
sum(data$BARD_2) ##Mejor


dimnames(data)

# Logistic for correct answer

## Bing
model_bing1 <- glm(BING_1M ~ AREA, family = binomial, data = data)
summary(model_bing1)
confint(model_bing1)
exp(-1.7466045)
exp(1.1055522)



confint(model_bing1)

model_bing2 <- glm(BING_1M ~ TIPO_ITEM, family = binomial, data = data)
summary(model_bing2)
exp(0.02247)
confint(model_bing2)
exp(-1.0416413)
exp(0.9564704)



model_bing3 <- glm(BING_1M ~ PERU_RQ, family = binomial, data = data)
summary(model_bing3)
exp(-0.4339)
confint(model_bing3)
exp(-1.350440)
exp(0.5771367)


## GPT-4
model_gpt4 <- glm(GPT4_1M ~ AREA, family = binomial, data = data)
summary(model_gpt4)
confint(model_gpt4)


exp(-1.3122) #EMG
exp(-0.9845) #MI
exp(-2.0260) #OBGYN
exp(-2.0053) #ped
exp(-1.4663) #Salud

confint(model_gpt4)

#conf low
exp(-4.628385) #EMG
exp(-3.937730) #MI
exp(-4.992221) #OBGYN
exp(-5.030563) #ped
exp(-4.516782) #Salud

#conf high
exp(1.99859887) #EMG
exp(0.80937701) #MI
exp(-0.20099191) #OBGYN
exp(0.01749489) #ped
exp(0.67547675) #Salud


model_gpt4 <- glm(GPT4_1M ~ PERU_RQ, family = binomial, data = data)
summary(model_gpt4)
exp(-1.4499)
confint(model_gpt4)
exp(-2.383536)
exp(-0.5016822)


model_gpt4 <- glm(GPT4_1M ~ TIPO_ITEM, family = binomial, data = data)
summary(model_gpt4)
exp(0.8144)
confint(model_gpt4)
exp(-0.1715311)
exp(1.742624)



## Claude
model_claude <- glm(CLAUDE_1 ~ AREA, family = binomial, data = data)
summary(model_claude)


exp(1.41528) #EMG
exp(0.77432) #MI
exp(-0.33647) #OBGYN
exp(0.07864) #ped
exp(1.26113) #Salud

confint(model_claude)

#conf low
exp(-0.5074463) #EMG
exp(-0.1986487) #MI
exp(-1.4083536) #OBGYN
exp(-1.3093244) #ped
exp(-0.1046582) #Salud

#conf high
exp(4.4175602) #EMG
exp(1.7304265) #MI
exp(0.7132315) #OBGYN
exp(1.1717712) #ped
exp(2.8788788) #Salud


model_claude <- glm(CLAUDE_1 ~ PERU_RQ, family = binomial, data = data)
summary(model_claude)
exp(-0.06492)
confint(model_claude)
exp(-0.8646068)
exp(0.7927101)


model_claude <- glm(CLAUDE_1 ~ TIPO_ITEM, family = binomial, data = data)
summary(model_claude)
exp(-0.4878)
confint(model_claude)
exp(-1.4040205)
exp(0.3321087)




## Bard
model_bard <- glm(BARD_2 ~ AREA, family = binomial, data = data)
summary(model_bard)


exp(0.8961) #EMG
exp(-0.2341) #MI
exp(-0.8557) #OBGYN
exp(0.5978) #ped
exp(0.3971) #Salud

confint(model_bard)

#conf low
exp(-1.0747798) #EMG
exp(-1.2793137) #MI
exp(-2.0135388) #OBGYN
exp(-1.9015902) #ped
exp(-0.9655411) #Salud

#conf high
exp(3.9125919) #EMG
exp(0.7262562) #MI
exp(0.2359154) #OBGYN
exp(0.6893382) #ped
exp(1.8713228) #Salud


model_bard <- glm(BARD_2 ~ PERU_RQ, family = binomial, data = data)
summary(model_bard)
exp(-0.3940)
confint(model_bard)
exp(-1.1638578)
exp(0.4026769)


model_bard <- glm(BARD_2 ~ TIPO_ITEM, family = binomial, data = data)
summary(model_bard)
exp(-0.8473)
confint(model_bard)
exp(-1.8101870)
exp(-0.005664824)



## GPT-3
model_gpt3 <- glm(GPT3_2 ~ AREA, family = binomial, data = data)
summary(model_gpt3)


exp(-0.86500) #EMG
exp(0.14660) #MI
exp(-0.12306) #OBGYN
exp(-0.64185) #ped
exp(0.05129) #Salud

confint(model_gpt3)

#conf low
exp(-2.52788795) #EMG
exp(-0.86402885) #MI
exp(-1.26221544) #OBGYN
exp(-1.90912529) #ped
exp(-1.20452759) #Salud

#conf high
exp(0.7762524) #EMG
exp(1.0994872) #MI
exp(0.9950735) #OBGYN
exp(0.6029458) #ped
exp(1.3417623) #Salud


model_gpt3 <- glm(GPT3_2 ~ PERU_RQ, family = binomial, data = data)
summary(model_gpt3)
exp(-0.3940)
confint(model_gpt3)
exp(-1.1638578)
exp(0.4026769)


model_gpt3 <- glm(GPT3_2 ~ TIPO_ITEM, family = binomial, data = data)
summary(model_gpt3)
exp(-0.1299)
confint(model_gpt3)
exp(-0.9478464)
exp(0.6350282)
